#include <getopt.h>
#include <stdlib.h>
#include <time.h>

#include <fstream>
#include <iostream>
#include <set>
#include <string>
#include <sstream>
#include <vector>

#include "denovo_scanner.h"
#include "trio_denovo_scanner.h"
#include "../error.h"
#include "../pedigree.h"
#include "../stringops.h"
#include "../version.h"
#include "../vcf_reader.h"

bool file_exists(const std::string& path){
  return (access(path.c_str(), F_OK) != -1);
}

void print_usage(){
  std::cerr << "Usage: DenovoFinder --fam <fam_file> --str-vcf <str_gts.vcf.gz> --denovo-vcf <denovos.vcf.gz> [OPTIONS]" << "\n" << "\n"
    
	    << "Required parameters:" << "\n"
	    << "\t" << "--fam        <fam_file>            "  << "\t" << "FAM file containing pedigree information for samples of interest"                     << "\n"
	    << "\t" << "--str-vcf    <str_gts.vcf.gz>      "  << "\t" << "Bgzipped input VCF file containing STR genotypes previously generated by HipSTR"      << "\n"
	    << "\t" << "--denovo-vcf <denovos.vcf.gz>      "  << "\t" << "Bgzipped output VCF file containing likelihoods of de novo mutations"                 << "\n" << "\n"

	    << "Important optional parameters:" << "\n"
	    << "\t" << "--snp-vcf    <phased_snps.vcf.gz>  "  << "\t" << "Bgzipped input VCF file containing phased SNP genotypes for the samples."             << "\n"
	    << "\t" << "                                   "  << "\t" << " File should be identical to --snp-vcf argument provided to HipSTR during genotyping" << "\n"
	    << "\t" << "                                   "  << "\t" << " NOTE: This is the preferred option if you ran HipSTR with --snp-vcf and "            << "\n"
	    << "\t" << "                                   "  << "\t" << " your SNP haplotypes have only a few switch errors per chromosome"                    << "\n"
	    << "\t" << "                                   "  << "\t" << " If run with this option, DenovoFinder will jointly test all children"                << "\n"
	    << "\t" << "                                   "  << "\t" << " in each family and will leverage the SNP haplotype transmission information"         << "\n"
	    << "\t" << "                                   "  << "\t" << " If run without this option, DenovoFinder will individually test each child in each"  << "\n"
	    << "\t" << "                                   "  << "\t" << " family and will not be able to exploit the SNP haplotype transmission information"   << "\n"
	    << "\t" << "--uniform-prior                    "  << "\t" << "When computing the likelihoods, use uniform allele priors for the parental genotypes" << "\n"
	    << "\t" << "                                   "  << "\t" << " By default, allele priors are based on the allele frequencies in the VCF"            << "\n"
	    << "\t" << "                                   "  << "\t" << " Use this option if the VCF has too few samples to provide reliable priors"           << "\n" << "\n"

	    << "Optional output parameters:" << "\n"
	    << "\t" << "--log <log.txt>                  "  << "\t" << "Output the log information to the provided file (Default = Standard error)"             << "\n" << "\n"

	    << "Other optional parameters:" << "\n"
	    << "\t" << "--help                             "  << "\t" << "Print this help message and exit"                                                     << "\n"
	    << "\t" << "--chrom         <chrom>            "  << "\t" << "Only consider STRs on this chromosome"                                                << "\n"
	    << "\t" << "--haploid-chrs  <list_of_chroms>   "  << "\t" << "Comma separated list of chromosomes to treat as haploid (Default = all diploid)"      << "\n"
	    << "\t" << "--skip-snps     <snp_list.txt>     "  << "\t" << "File containing SNPs to omit from the analysis. Each line should contain a "          << "\n"
	    << "\t" << "                                   "  << "\t" << " position in the format CHROMOSOME:START"                                             << "\n"
	    << "\t" << "--version                          "  << "\t" << "Print DenovoFinder version and exit"                                                  << "\n"
	    << "\n";
}
  
void parse_command_line_args(int argc, char** argv, std::string& fam_file, std::string& snp_vcf_file, std::string& str_vcf_file, std::string& denovo_vcf_file,
			     std::string& chrom, std::string& log_file, std::string& haploid_chr_string, std::string& snp_skip_file, int& uniform_prior){
  if (argc == 1 || (argc == 2 && std::string("-h").compare(std::string(argv[1])) == 0)){
    print_usage();
    exit(0);
  }

  int print_help    = 0;
  int print_version = 0;

  static struct option long_options[] = {
    {"chrom",           required_argument, 0, 'c'},
    {"denovo-vcf",      required_argument, 0, 'd'},
    {"fam",             required_argument, 0, 'f'},
    {"log",             required_argument, 0, 'l'},
    {"h",               no_argument, &print_help, 1},
    {"help",            no_argument, &print_help, 1},
    {"version",         no_argument, &print_version, 1},
    {"uniform-prior",   no_argument, &uniform_prior, 1},
    {"skip-snps",       required_argument, 0, 'm'},
    {"str-vcf",         required_argument, 0, 'o'},
    {"haploid-chrs",    required_argument, 0, 't'},
    {"snp-vcf",         required_argument, 0, 'v'},
    {0, 0, 0, 0}
  };

  std::string filename;
  while (true){
    int option_index = 0;
    int c = getopt_long(argc, argv, "c:d:f:l:m:o:t:v:", long_options, &option_index);
    if (c == -1)
      break;

    switch(c){
    case 0:
      break;
    case 'c':
      chrom = std::string(optarg);
      break;
    case 'd':
      denovo_vcf_file = std::string(optarg);
      break;
    case 'f':
      fam_file = std::string(optarg);
      break;
    case 'l':
      log_file = std::string(optarg);
      break;
    case 'm':
      snp_skip_file = std::string(optarg);
      break;
    case 'o':
      str_vcf_file = std::string(optarg);
      break;
    case 't':
      haploid_chr_string = std::string(optarg);
      break;
    case 'v':
      snp_vcf_file = std::string(optarg);
      break;
    case '?':
      printErrorAndDie("Unrecognized command line option");
      break;
    default:
      abort();
      break;
    }
  }

  if (optind < argc) {
    std::stringstream msg;
    msg << "Did not recognize the following command line arguments:" << "\n";
    while (optind < argc)
      msg << "\t" << argv[optind++] << "\n";
    msg << "Please check your command line syntax or type ./DenovoFinder --help for additional information" << "\n";
    printErrorAndDie(msg.str());
  }

  if (print_version == 1){
    std::cerr << "DenovoFinder version " << VERSION << std::endl;
    exit(0);
  }
  if (print_help){
    print_usage();
    exit(0);
  }
}

void read_site_skip_list(const std::string& input_file, std::set<std::string>& sites_to_skip){
  sites_to_skip.clear();
  std::ifstream input(input_file.c_str());
  std::string line;
  while (std::getline(input, line))
    sites_to_skip.insert(line);
  input.close();
}

int main(int argc, char** argv){
  double total_time = clock();
  int uniform_prior = 0;

  std::stringstream full_command_ss;
  full_command_ss << "DenovoFinder-" << VERSION;
  for (int i = 1; i < argc; i++)
    full_command_ss << " " << argv[i];
  std::string full_command = full_command_ss.str();

  std::string fam_file = "", snp_vcf_file = "", str_vcf_file = "", denovo_vcf_file = "";
  std::string chrom = "", log_file = "", haploid_chr_string  = "", snp_skip_file = "";
  parse_command_line_args(argc, argv, fam_file, snp_vcf_file, str_vcf_file, denovo_vcf_file,
			  chrom, log_file, haploid_chr_string, snp_skip_file, uniform_prior);

  bool use_pop_priors = (uniform_prior == 0); // If true, we compute parental genotype priors from population frequencies
                                              // Otherwise, we use a uniform prior for each allele

  if (fam_file.empty())
    printErrorAndDie("--fam option required");
  else if (denovo_vcf_file.empty())
    printErrorAndDie("--denovo-vcf option required");
  else if (str_vcf_file.empty())
    printErrorAndDie("--str-vcf option required");

  // Check that the FAM file exists
  if (!file_exists(fam_file))
    printErrorAndDie("FAM file " + fam_file + " does not exist. Please ensure that the path provided to --fam is valid");

  // Check that the STR VCF file exists, has a tabix index and then open it
  if (!file_exists(str_vcf_file))
    printErrorAndDie("STR VCF file " + str_vcf_file + " does not exist. Please ensure that the path provided to --str-vcf is valid");
  if (!file_exists(str_vcf_file + ".tbi"))
    printErrorAndDie("No .tbi index found for the STR VCF file. Please index using tabix and rerun DenovoFinder");
  VCF::VCFReader str_vcf(str_vcf_file);
  
  // Restrict the analysis to a given chromosome, if requested
  if (!chrom.empty())
    if (!str_vcf.set_region(chrom, 0))
      printErrorAndDie("Failed to set the region to chromosome " + chrom + " in the STR VCF. Please check the STR VCF and rerun the analysis");

  std::ofstream log_;
  if (!log_file.empty()){
    log_.open(log_file, std::ofstream::out);
  if (!log_.is_open())
    printErrorAndDie("Failed to open the log file: " + log_file);
  }

  if (!haploid_chr_string.empty()){
    std::vector<std::string> haploid_chroms;
    split_by_delim(haploid_chr_string, ',', haploid_chroms);
    printErrorAndDie("DenovoFinder does not currently support haploid chromosomes. We apologize for the inconvenience");
  }

  std::ostream& logger = (log_file.empty() ?  std::cerr : log_);

  if (!snp_vcf_file.empty()){
    // Check that the SNP VCF file exists, has a tabix index and then open it
    if (!file_exists(snp_vcf_file))
      printErrorAndDie("SNP VCF file " + snp_vcf_file + " does not exist. Please ensure that the path provided to --snp-vcf is valid");
    if (!file_exists(snp_vcf_file + ".tbi"))
      printErrorAndDie("No .tbi index found for the SNP VCF file. Please index using tabix and rerun DenovoFinder");
    VCF::VCFReader snp_vcf(snp_vcf_file);

    // Determine which samples have both SNP and STR data
    std::set<std::string> samples_with_data;
    std::set<std::string> str_samples(str_vcf.get_samples().begin(), str_vcf.get_samples().end());
    for (auto sample_iter = snp_vcf.get_samples().begin(); sample_iter != snp_vcf.get_samples().end(); sample_iter++)
      if (str_samples.find(*sample_iter) != str_samples.end())
	samples_with_data.insert(*sample_iter);

    // Extract the nuclear families with both types of data
    std::vector<NuclearFamily> families;
    extract_pedigree_nuclear_families(fam_file, samples_with_data, families, logger);
    logger << "\tOnly the nuclear families will undergo de novo analysis\n";

    // Read a list of sites to skip
    std::set<std::string> sites_to_skip;
    if (!snp_skip_file.empty())
      read_site_skip_list(snp_skip_file, sites_to_skip);

    // Scan for de novos, and output the results to a VCF
    logger << "\tJointly testing all children in each family for de novo mutations" << "\n"
	   << "\tPlease ensure that phased genotype likelihoods (FORMAT = PHASEDGL) are available in the VCF\n" << std::endl;
    DenovoScanner denovo_scanner(families, denovo_vcf_file, full_command, use_pop_priors);
    denovo_scanner.scan(snp_vcf_file, str_vcf, sites_to_skip, logger);
    denovo_scanner.finish();
  }
  else {
    // Extract the nuclear families with STR data
    std::set<std::string> str_samples(str_vcf.get_samples().begin(), str_vcf.get_samples().end());
    std::vector<NuclearFamily> families;
    extract_pedigree_nuclear_families(fam_file, str_samples, families, logger);
    logger << "\tOnly the nuclear families will undergo de novo analysis\n";

    // Scan for de novos using the trio approach
    logger << "\tIndividually testing each child in each family for de novo mutations" << "\n"
	   << "\tPlease ensure that genotype likelihoods (FORMAT = GL) are available in the VCF\n" << std::endl;
    TrioDenovoScanner denovo_scanner(families, denovo_vcf_file, full_command, use_pop_priors);
    denovo_scanner.scan(str_vcf, logger);
    denovo_scanner.finish();
  }

  total_time = (clock() - total_time)/CLOCKS_PER_SEC;
  logger << "DenovoFinder execution finished: Total runtime = " << total_time << " sec" << std::endl;

  if (!log_file.empty())
    log_.close();

  return 0;
}
